import pandas as pd
import json
INPUT = "data/tokens2_train.json"
OUTPUT = "feature_data/hardcode_feature.csv"
tokens = json.load(open(INPUT))
label_list = []
for pair in tokens:
    temp1 = "".join(pair[0])
    temp2 = "".join(pair[1])
    def check_contain(str1,str2):
        if str1 in temp1 and str2 in temp2 or str1 in temp2 and str2 in temp1 :
            return True
    if check_contain("谁写的","谁的著作"):
        label_list.append(1)
    elif check_contain("古代流放", "流放"):
        label_list.append(-1)
    else:
        label_list.append(0)

features_df = pd.DataFrame(label_list, columns=["hardcode"])
features_df.to_csv(OUTPUT,encoding="utf-8",index=False)
